Parameters

filtered_data <- read.csv("data/data-2023-09-11 (2).csv", header = TRUE)

selected_stats <- c("Ho", "Hs", "Ht", "Fis (W&C)", "Fst (W&C)", "Fis (Nei)", "Fst (Nei)"
)

n_rep=10

n_pop = 8

sequence_length <- length(6:11) 

data filtering convertion

Run basic.stats and render the result

library(pegas)
library(dplyr)
library(tibble)

result <- basic.stats(mydata_hierfstat)
df_resutl_basic<-as.data.frame(result$perloc)

# Weir and Cockrham estimates of Fstatistics - FIS and FST 
result_f_stats <- Fst(as.loci(mydata_genind), na.alleles = "")
result_f_stats <- result_f_stats[,2:3]
colnames(result_f_stats) <- c("Fst (W&C)", "Fis (W&C)")
result_f_stats <- merge(result_f_stats, df_resutl_basic, by="row.names",all.x=TRUE)
colnames(result_f_stats)[10] <- "Fst (Nei)"
colnames(result_f_stats)[12] <- "Fis (Nei)"
result_f_stats <- result_f_stats %>% column_to_rownames(., var = 'Row.names')
result_f_stats_selec <- result_f_stats %>% select(all_of(selected_stats))
result_f_stats_selec

G-statistic

library(hrbrthemes)
library(ggplot2)

# compute the Gstats
result_f_stats <- result_f_stats %>% mutate(GST = 1-Hs/Ht)
result_f_stats <- result_f_stats %>% mutate("GST''" = (n_pop*(Ht-Hs))/((n_pop*Hs-Ht)*(1-Hs)))
result_f_stats

# Plot with linear trend
p2 <- ggplot(result_f_stats, aes(x=GST, y=Hs)) +
  geom_point() +
  geom_smooth(method=lm , color="red", se=FALSE) +
  theme_ipsum()
p2

Missing data

# Libraries
library("poppr")
This is poppr version 2.9.4. To get started, type package?poppr
OMP parallel support: unavailable
library("heatmaply")
Loading required package: plotly
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout

Loading required package: viridis
Loading required package: viridisLite
Registered S3 method overwritten by 'dendextend':
  method     from 
  rev.hclust vegan
Registered S3 method overwritten by 'seriation':
  method         from 
  reorder.hclust vegan

======================
Welcome to heatmaply version 1.5.0

Type citation('heatmaply') for how to cite the package.
Type ?heatmaply for the main documentation.

The github page is: https://github.com/talgalili/heatmaply/
Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
You may ask questions at stackoverflow, use the r and heatmaply tags: 
     https://stackoverflow.com/questions/tagged/heatmaply
======================
missing_data <- info_table(mydata_genind, plot = FALSE)

# Matrix format
mat <- as.matrix(missing_data)
# heatmap
p <- heatmaply(mat, 
               dendrogram = "none",
               xlab = "", ylab = "", 
               main = "",
               scale = "column",
               margins = c(60,100,40,20),
               grid_color = "white",
               grid_width = 0.00001,
               titleX = FALSE,
               hide_colorbar = TRUE,
               branches_lwd = 0.1,
               label_names = c("Population", "Marker", "Value"),
               fontsize_row = 8, fontsize_col = 8,
               labCol = colnames(mat),
               labRow = rownames(mat),
               heatmap_layers = theme(axis.line=element_blank())
)
p
p

HW - Panmixia

genepop_HWtable <- HWtable_analysis(
  inputFile = "data/mydata.genepop.txt",
  which = "Proba",
  settingsFile = "",
  enumeration = FALSE,
  dememorization = 10000,
  batches = 20,
  iterations = 5000,
  verbose = interactive()
)
Error: Cannot read correctly. Check second line of input

Linkage Disequilibrium

pair.ia(mydata_genind, sample = 500, quiet = FALSE, method = 3 )

  |                                                                                                                 
  |                                                                                                           |   0%
                                                                                                                    

  |                                                                                                                 
  |                                                                                                           |   0%
  |                                                                                                                 
  |==                                                                                                         |   2%
  |                                                                                                                 
  |====                                                                                                       |   4%
                                                                                                                    
             Ia    p.Ia   rbarD    p.rD
B12:C07 -0.0039  0.9421 -0.0039  0.9421
B12:D12  0.0545  0.0020  0.0547  0.0020
B12:D10  0.0204  0.0020  0.0204  0.0020
B12:A12  0.0356  0.0020  0.0356  0.0020
B12:C03  0.0531  0.0020  0.0534  0.0020
C07:D12 -0.0233  1.0000 -0.0233  1.0000
C07:D10  0.0172  0.0020  0.0172  0.0020
C07:A12  0.0364  0.0020  0.0365  0.0020
C07:C03  0.1252  0.0020  0.1254  0.0020
D12:D10  0.0708  0.0020  0.0709  0.0020
D12:A12  0.0244  0.0020  0.0245  0.0020
D12:C03 -0.0135  1.0000 -0.0135  1.0000
D10:A12  0.0170  0.0020  0.0170  0.0020
D10:C03  0.0456  0.0020  0.0458  0.0020
A12:C03  0.0284  0.0020  0.0287  0.0020

shuffle df


#shuffled_matrices <- replicate(n_rep, mat[sample(nrow(mat)), ], simplify = FALSE)
shuffled_matrices <- replicate(n_rep, mat[sample(length(mat), replace = FALSE)], simplify = FALSE)
##################
# shuffle only the genotype and add the pop column later for each matrices. 
#in the loop? 
###############


# Create a list to store the wc
fst_df <- numeric(sequence_length)
fis_df <- numeric(sequence_length)

# Calculate the average for each shuffled matrix

# Iterate through the shuffled matrices
for (i in 1:n_rep) {
  # Calculate the statistics for the i-th matrix
  #HERE THE COLUMN POP
  merged_df <- cbind(level_pop, shuffled_matrices[[i]])
  result_f_stats <- wc(shuffled_matrices[[i]]) 
  result_f_stats <- as.data.frame(result_f_stats$per.loc)
  # Extract FST and FIS values
  fst_values <- result_f_stats$FST
  fis_values <- result_f_stats$FIS
  print( fst_values)
  # Assign values to the data frames
  fst_df <- cbind(fst_df, result_f_stats$FST)
  fis_df <- cbind(fis_df, result_f_stats$FIS)
}

# Set row names as in result_f_stats

rownames(fst_df) <- rownames(fis_df) <- rownames(result_f_stats)
result_FST <- fst_df[, -1]
fis_df <-fis_df[, -1]
vec <- seq(1, n_rep)
colnames(result_FST) <- colnames(fis_df) <- vec


result_FST[1,] 

count (result_f_stats[,1][1] > result_FST[1,] )
count <- sum(result_f_stats[,1][1] > result_FST[1, ])



# Initialize an empty data frame to store the counts
count_df <- data.frame(
  Greater = numeric(length(result_FST)),
  Smaller = numeric(length(result_FST))
)

# Compare the values in result_f_stats[1] to result_FST for each column
for (col in colnames(result_FST)) {
  greater_count <- sum(result_f_stats[1] > result_FST[, col])
  smaller_count <- sum(result_f_stats[1] < result_FST[, col])
  count_df$Greater[col] <- greater_count
  count_df$Smaller[col] <- smaller_count
}

# Print the count data frame
print(count_df)

######################## ######################## 

---
title: "R Notebook"
output: html_notebook
editor_options: 
  chunk_output_type: inline
---

# Parameters 

```{r}
filtered_data <- read.csv("data/data-2023-09-11 (2).csv", header = TRUE)

selected_stats <- c("Ho", "Hs", "Ht", "Fis (W&C)", "Fst (W&C)", "Fis (Nei)", "Fst (Nei)"
)

n_rep=10

n_pop = 8

sequence_length <- length(6:11) 
```



# data filtering convertion

```{r}
library(hierfstat)
library(adegenet)
filtered_data <- data.frame(indv = paste(substr(filtered_data$Population,1,3), row.names(filtered_data), sep="."), filtered_data)
# Create mydata_genind
population <- filtered_data$Population
mydata_genind <- df2genind(
  X = filtered_data[,6:11],
  sep = "/",
  ncode = 6,
  ind.names = filtered_data$indv,
  pop = filtered_data$Population,
  NA.char = "0/0",
  ploidy = 2,
  type = "codom",
  strata = NULL,
  hierarchy = NULL
)
mydata_genind
mydata_hierfstat <- genind2hierfstat(mydata_genind)
```



# Run basic.stats and render the result

```{r}
library(pegas)
library(dplyr)
library(tibble)

result <- basic.stats(mydata_hierfstat)
df_resutl_basic<-as.data.frame(result$perloc)

# Weir and Cockrham estimates of Fstatistics - FIS and FST 
result_f_stats <- Fst(as.loci(mydata_genind), na.alleles = "")
result_f_stats <- result_f_stats[,2:3]
colnames(result_f_stats) <- c("Fst (W&C)", "Fis (W&C)")
result_f_stats <- merge(result_f_stats, df_resutl_basic, by="row.names",all.x=TRUE)
colnames(result_f_stats)[10] <- "Fst (Nei)"
colnames(result_f_stats)[12] <- "Fis (Nei)"
result_f_stats <- result_f_stats %>% column_to_rownames(., var = 'Row.names')
result_f_stats_selec <- result_f_stats %>% select(all_of(selected_stats))
result_f_stats_selec
```

# G-statistic

```{r}
library(hrbrthemes)
library(ggplot2)

# compute the Gstats
result_f_stats <- result_f_stats %>% mutate(GST = 1-Hs/Ht)
result_f_stats <- result_f_stats %>% mutate("GST''" = (n_pop*(Ht-Hs))/((n_pop*Hs-Ht)*(1-Hs)))
result_f_stats

# Plot with linear trend
p2 <- ggplot(result_f_stats, aes(x=GST, y=Hs)) +
  geom_point() +
  geom_smooth(method=lm , color="red", se=FALSE) +
  theme_ipsum()
p2

```

# Missing data

```{r}
# Libraries
library("poppr")
library("heatmaply")

missing_data <- info_table(mydata_genind, plot = FALSE)

# Matrix format
mat <- as.matrix(missing_data)
# heatmap
p <- heatmaply(mat, 
               dendrogram = "none",
               xlab = "", ylab = "", 
               main = "",
               scale = "column",
               margins = c(60,100,40,20),
               grid_color = "white",
               grid_width = 0.00001,
               titleX = FALSE,
               hide_colorbar = TRUE,
               branches_lwd = 0.1,
               label_names = c("Population", "Marker", "Value"),
               fontsize_row = 8, fontsize_col = 8,
               labCol = colnames(mat),
               labRow = rownames(mat),
               heatmap_layers = theme(axis.line=element_blank())
)
p
```



# HW - Panmixia

```{r}


library("pegas")

hw.test(as.loci(mydata_genind), B = 1000)


library(genepop)
#https://cran.r-project.org/web/packages/genepop/genepop.pdf
genepop_HW <- test_HW(
  inputFile = "data/mydata.genepop.txt",
  which = "Proba",
  outputFile = "",
  settingsFile = "",
  enumeration = FALSE,
  dememorization = 10000,
  batches = 20,
  iterations = 5000,
  verbose = interactive()
)

genepop_HWtable <- HWtable_analysis(
  inputFile = "data/mydata.genepop.txt",
  which = "Proba",
  settingsFile = "",
  enumeration = FALSE,
  dememorization = 10000,
  batches = 20,
  iterations = 5000,
  verbose = interactive()
)
```



# Linkage Disequilibrium

```{r}

LD(as.loci(mydata_genind$tab), locus = c(1, 2), details = TRUE)

a<-LDscan(as.loci(mydata_hierfstat[,2:7]))

LDmap(a)


head(mydata_hierfstat[,2:7])


library(radiator)
#https://thierrygosselin.github.io/radiator/reference/genomic_converter.html
#https://thierrygosselin.github.io/radiator/articles/rad_genomics_computer_setup.html

mydata1 <- genomic_converter(mydata_genind, parallel.core = parallel::detectCores() - 1, output = "genepop", filename = "../data/mydata.genepop.txt")
mydata1 <- tidy_genind(mydata_genind, tidy = TRUE, write = FALSE, verbose = TRUE)

detect_all_missing()


library(genepop)


test_LD(
  inputFile = "data/mydata.genepop.txt",
  outputFile = "",
  settingsFile = "",
  dememorization = 10000,
  batches = 100,
  iterations = 5000,
  verbose = interactive()
)

#Fst(
#  inputFile = "data/mydata.genepop.txt",
#  sizes = FALSE,
#  pairs = FALSE,
#  outputFile = "",
#  dataType = "Diploid",
#  verbose = interactive()
#)

library("poppr") 
pair.ia(mydata_genind, sample = 500, quiet = FALSE, method = 3 )
# working but different values

```


# shuffle df

```{r}

#shuffled_matrices <- replicate(n_rep, mat[sample(nrow(mat)), ], simplify = FALSE)
shuffled_matrices <- replicate(n_rep, mat[sample(length(mat), replace = FALSE)], simplify = FALSE)
##################
# shuffle only the genotype and add the pop column later for each matrices. 
#in the loop? 
###############


# Create a list to store the wc
fst_df <- numeric(sequence_length)
fis_df <- numeric(sequence_length)

# Calculate the average for each shuffled matrix

# Iterate through the shuffled matrices
for (i in 1:n_rep) {
  # Calculate the statistics for the i-th matrix
  #HERE THE COLUMN POP
  merged_df <- cbind(level_pop, shuffled_matrices[[i]])
  result_f_stats <- wc(shuffled_matrices[[i]]) 
  result_f_stats <- as.data.frame(result_f_stats$per.loc)
  # Extract FST and FIS values
  fst_values <- result_f_stats$FST
  fis_values <- result_f_stats$FIS
  print( fst_values)
  # Assign values to the data frames
  fst_df <- cbind(fst_df, result_f_stats$FST)
  fis_df <- cbind(fis_df, result_f_stats$FIS)
}

# Set row names as in result_f_stats

rownames(fst_df) <- rownames(fis_df) <- rownames(result_f_stats)
result_FST <- fst_df[, -1]
fis_df <-fis_df[, -1]
vec <- seq(1, n_rep)
colnames(result_FST) <- colnames(fis_df) <- vec


result_FST[1,] 

count (result_f_stats[,1][1] > result_FST[1,] )
count <- sum(result_f_stats[,1][1] > result_FST[1, ])



# Initialize an empty data frame to store the counts
count_df <- data.frame(
  Greater = numeric(length(result_FST)),
  Smaller = numeric(length(result_FST))
)

# Compare the values in result_f_stats[1] to result_FST for each column
for (col in colnames(result_FST)) {
  greater_count <- sum(result_f_stats[1] > result_FST[, col])
  smaller_count <- sum(result_f_stats[1] < result_FST[, col])
  count_df$Greater[col] <- greater_count
  count_df$Smaller[col] <- smaller_count
}

# Print the count data frame
print(count_df)

######################## ######################## 

```



#

```{r}


```

#

```{r}


```

#

```{r}


```

#

```{r}


```

#

```{r}


```

#

```{r}


```

#

```{r}


```

#

```{r}


```
